# -*- coding:utf-8 -*-
# @Author: shenyuyu
# @Time: 2023/6/26 16:29
# @File: qu_10.py

"""
2、用户和关键词组合分析Top5
[('6185822016522959_scala', 2016), ('41641664258866384_博学', 1372), ('41641664258866384_节点', 1372), ('44801909258572364_hadoop', 1260), ('7044693659960919_数据', 1120)]
"""

from pyspark import SparkConf, SparkContext

if __name__ == '__main__':
    conf = SparkConf().setAppName("a").setMaster("local[*]")
    sc = SparkContext(conf=conf)
    # rdd = sc.textFile("../data/SogouQ.txt")
    rdd = sc.textFile("file:///tmp/pycharm_project_161/data/SogouQ.txt")
    # print(rdd.collect())
    rdd1 = rdd.map(lambda x: x.split("\t"))
    # print(rdd1.collect())
    rdd2 = rdd1.map(lambda x: (x[1] + "_" + x[2], 1))
    # print(rdd2.collect())
    rdd3 = rdd2.reduceByKey(lambda a, b: a + b)
    # print(rdd3.collect())
    rdd4 = rdd3.sortBy(lambda x: x[1], ascending=False)
    # print(rdd4.collect())
    print(rdd4.take(5))